library(affy)
library(limma)
library(annotate)
library(hgu133plus2.db)
library(openxlsx)
library(ggplot2)
library(tidyverse)
library(escape)
library(GSVA)
library(pheatmap)
library(limma)
library(robustbase)
library(corrr)
library(tidyverse)homedir <- getwd()
setwd("raw_data/GSE4290_RAW/")
# Load the raw data (assuming .CEL files are in your working directory)
raw_data <- ReadAffy()
setwd(homedir)
# boxplot(exprs(raw_data))
# Preprocess the raw data with RMA
norm_data <- rma(raw_data)
# View the first few rows of the expression matrix
exprs_data <- exprs(norm_data)
head(exprs_data)
# Check the dimensions of the dataset
dim(exprs_data)
# Get probe annotations (Gene symbols for Affymetrix IDs)
gene_symbols <- getSYMBOL(rownames(exprs_data), "hgu133plus2.db") # hgu133plus2.db
# Add gene symbols as a new column to your data
exprs_data_with_genes <- data.frame(GeneSymbol = gene_symbols, exprs_data)
# Remove rows with NA gene symbols
exprs_data_with_genes <- exprs_data_with_genes[!is.na(exprs_data_with_genes$GeneSymbol), ]
exprs_data_with_genes <- exprs_data_with_genes[!duplicated(exprs_data_with_genes$GeneSymbol), ]
rownames(exprs_data_with_genes) <- exprs_data_with_genes$GeneSymbol
exprs_data_with_genes$GeneSymbol <- NULL
colnames(exprs_data_with_genes) <- sub("\\..*$",
"", colnames(exprs_data_with_genes))# Load the GEOquery library
library(GEOquery)
# Download the GEO dataset
gse <- getGEO("GSE4290", GSEMatrix = TRUE)
# If the dataset has multiple platforms (GPL), choose the first
gse <- gse[[1]]
# Access the metadata (phenoData or pData)
metadata <- pData(gse)
# View the first few rows of metadata
head(metadata)
write.xlsx(metadata,
"raw_data/metadata.xlsx",
rowNames = T)Exclude irrelevant columns
exprs_data_with_genes <- read.xlsx("raw_data/GSE4290_exprs_data_with_genes.xlsx",
rowNames = T)
metadata <- read.xlsx("raw_data/metadata.xlsx", rowNames = T)
# write.xlsx(phenoData(norm_data),
# "raw_data/metadata.xlsx",
# rowNames = T)
# Boxplot for normalized data
boxplot(exprs_data_with_genes, main = "Boxplot of Normalized Data")# Density plot to inspect the distribution
plotDensity(exprs_data_with_genes, main = "Density Plot of Normalized Data")# Assuming 'data' is your data frame
# Exclude columns where all values are unique
metadata_filtered <- metadata[, sapply(metadata,
function(x) length(unique(x)) != nrow(metadata))]
# View the filtered dataset
View(metadata_filtered)
metadata_filtered$ID <- rownames(metadata_filtered)
# metadata_filtered <- metadata_filtered[metadata_filtered$`Histopathological diagnostic:ch1` != "", ]
metadata_filtered_use <- metadata_filtered[, c("ID", "description",
"Histopathological.diagnostic:ch1")]
dim(metadata_filtered_use)## [1] 180 3
exprs_data_with_genes <- as.data.frame(t(exprs_data_with_genes))
exprs_data_with_genes <- merge(exprs_data_with_genes,
metadata_filtered_use,
by.x = "row.names",
by.y = "ID")
rownames(exprs_data_with_genes) <- exprs_data_with_genes$Row.names
exprs_data_with_genes <- dplyr::filter(exprs_data_with_genes,
`Histopathological.diagnostic:ch1` %in%
c("non-tumor", "glioblastoma, grade 4"))
dim(exprs_data_with_genes)## [1] 100 20827
meta <- exprs_data_with_genes[, c("Row.names", "description", "Histopathological.diagnostic:ch1")]
exprs_data_with_genes$Row.names <- NULL
exprs_data_with_genes$`Histopathological diagnostic:ch1` <- NULL
exprs_data_with_genes$description <- NULL
group <- meta$`Histopathological.diagnostic:ch1`
group[group!="non-tumor"] <- "glioblastoma, grade 4"
group <- factor(group,
levels = c("non-tumor", "glioblastoma, grade 4"))
exprs_data_with_genes$`Histopathological.diagnostic:ch1` <- NULL
pca <- prcomp(exprs_data_with_genes,
center = T, scale. = T)
## make a scree plot
pca.var <- pca$sdev^2
pca.var.per <- round(pca.var/sum(pca.var)*100, 1)
pca.var.per## [1] 21.0 10.9 6.1 4.3 3.6 2.7 2.4 2.3 2.1 1.8 1.5 1.4 1.3 1.3 1.2
## [16] 1.1 1.0 1.0 1.0 0.9 0.8 0.8 0.8 0.8 0.7 0.7 0.7 0.7 0.6 0.6
## [31] 0.6 0.6 0.6 0.6 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5
## [46] 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4
## [61] 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3
## [76] 0.3 0.3 0.3 0.3 0.3 0.3 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2
## [91] 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.0
pcs <- data.frame(PC1 = pca$x[, 1],
PC2 = pca$x[, 2],
Group = group)
plot <- ggplot(pcs,
aes(x = PC1, y = PC2,
color = Group)) +
geom_point(size = 3) +
ggtitle("Gene-based PCA plot of Healthy\nVs Tumour Samples") +
theme_bw() +
xlab(paste0("PC1: ", pca.var.per[1], "%")) +
ylab(paste0("PC2: ", pca.var.per[2], "%")) +
theme_minimal() +
scale_color_manual(values = c("blue", "red")) +
theme(aspect.ratio = 1)
plotDifferential gene expression
group <- as.character(group)
group[grep("non-tumor", group)] <- "normal"
group[grep("glioblastoma", group)] <- "grade4_glioblastoma"
groups <- factor(group)
Groups_df <- data.frame(Groups = groups)
model_mat <- model.matrix(~ 0 + groups)
colnames(model_mat) <- levels(groups)
fit <- lmFit(t(exprs_data_with_genes),
model_mat)
contrast_matrix <- makeContrasts(tumour_vs_normal = grade4_glioblastoma - normal,
levels = model_mat)
fit2 <- contrasts.fit(fit,
contrast_matrix)
fit2 <- eBayes(fit2)
top_table <- topTable(fit2, adjust.method = "fdr", number = Inf)
top_table %>%
head(10) %>%
knitr::kable(caption = "DGEs: Gliobastoma Stage 4 vs Non-tumor Samples",
"pipe")| logFC | AveExpr | t | P.Value | adj.P.Val | B | |
|---|---|---|---|---|---|---|
| RASAL1 | -1.7300812 | 8.751644 | -17.44895 | 0 | 0 | 62.46116 |
| SERTM1 | -2.7839304 | 6.036574 | -16.40250 | 0 | 0 | 57.95133 |
| KCTD4 | -1.9938252 | 7.829818 | -15.46134 | 0 | 0 | 53.77194 |
| AJAP1 | -1.3051305 | 6.777516 | -15.27319 | 0 | 0 | 52.92262 |
| UNC5C-AS1 | -2.2572062 | 5.647762 | -15.18011 | 0 | 0 | 52.50077 |
| NWD2 | -3.0421527 | 5.734739 | -15.17944 | 0 | 0 | 52.49770 |
| HTR2C | -2.4812919 | 5.450491 | -15.13076 | 0 | 0 | 52.27666 |
| FEZF2 | -2.0626006 | 5.402323 | -14.92729 | 0 | 0 | 51.34943 |
| SST | -4.0109431 | 7.805073 | -14.71609 | 0 | 0 | 50.38147 |
| IL12RB2 | -0.8916141 | 5.008674 | -14.62804 | 0 | 0 | 49.97626 |
Visualisation
# Create an annotation dataframe for the metadata
annotation_col <- data.frame(
Group = factor(meta$`Histopathological.diagnostic:ch1`), # 'metadata$Group' contains grouping information like tumor type
row.names = meta$Row.names # Ensure row names match your dataset
)
up_sig <- top_table[top_table$adj.P.Val < 0.05 & top_table$logFC > 0, ]
# Create a heatmap with the top differentially expressed genes
top_genes <- rownames(up_sig)[1:50] # Select top 50 genes
heatmap_data <- t(exprs_data_with_genes)[top_genes, ]
annot_colors=list(Group=c(`glioblastoma, grade` ="red",
`non-tumor`="blue"))
# Define the annotation and colors for the groups
ann_colors <- list(
Group = c("non-tumor" = "blue",
"glioblastoma, grade 4" = "red")
)
annotation_col$Group <- factor(annotation_col$Group,
levels = c("non-tumor", "glioblastoma, grade 4"))
pheatmap(heatmap_data,
annotation_col = annotation_col,
annotation_colors = ann_colors,
main = "DGEs: Gliobastoma Stage 4 vs Non-tumor Samples",
scale = "row",
cluster_rows = TRUE, cluster_cols = TRUE)## [1] TRUE
gsvaPar <- ssgseaParam(t(exprs_data_with_genes), GS.hallmark)
gsva.es <- gsva(gsvaPar, verbose=FALSE)## No annotation package name available in the input data object.
## Attempting to directly match identifiers in data to gene sets.
## [1] "Calculating ranks..."
## [1] "Calculating absolute values from ranks..."
## [1] "Normalizing..."
## [1] 50 100
group <- as.character(group)
group[grep("non-tumor", group)] <- "normal"
group[grep("glioblastoma", group)] <- "grade4_glioblastoma"
groups <- factor(group)
Groups_df <- data.frame(Groups = groups)
model_mat <- model.matrix(~ 0 + groups)
colnames(model_mat) <- levels(groups)
fit <- lmFit(gsva.es,
model_mat)
contrast_matrix <- makeContrasts(tumour_vs_normal = grade4_glioblastoma - normal,
levels = model_mat)
fit2 <- contrasts.fit(fit,
contrast_matrix)
fit2 <- eBayes(fit2)
top_table <- topTable(fit2, adjust.method = "fdr", number = Inf)
top_table_up_sig <- top_table[top_table$adj.P.Val < 0.0001, ]
dim(top_table_up_sig)## [1] 36 6
top_table_up_sig %>%
head(10) %>%
knitr::kable(caption = "Differentialy affected pathways in tumors vs non-tumor samples",
"pipe")| logFC | AveExpr | t | P.Value | adj.P.Val | B | |
|---|---|---|---|---|---|---|
| HALLMARK_NOTCH_SIGNALING | 0.0994159 | 0.3303655 | 11.546121 | 0 | 0 | 35.29995 |
| HALLMARK_APOPTOSIS | 0.1014003 | 0.4121682 | 9.668461 | 0 | 0 | 25.84981 |
| HALLMARK_ANGIOGENESIS | 0.1970001 | 0.3107932 | 9.624302 | 0 | 0 | 25.62703 |
| HALLMARK_P53_PATHWAY | 0.0671140 | 0.4260906 | 9.565442 | 0 | 0 | 25.33016 |
| HALLMARK_E2F_TARGETS | 0.2134330 | 0.4539150 | 9.559254 | 0 | 0 | 25.29895 |
| HALLMARK_DNA_REPAIR | 0.0855651 | 0.6066467 | 9.456128 | 0 | 0 | 24.77902 |
| HALLMARK_G2M_CHECKPOINT | 0.1841436 | 0.3601000 | 9.176215 | 0 | 0 | 23.36961 |
| HALLMARK_INTERFERON_ALPHA_RESPONSE | 0.2056489 | 0.4099559 | 9.118630 | 0 | 0 | 23.08007 |
| HALLMARK_INTERFERON_GAMMA_RESPONSE | 0.1617153 | 0.3443235 | 9.095861 | 0 | 0 | 22.96564 |
| HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION | 0.1936996 | 0.3793006 | 9.033519 | 0 | 0 | 22.65245 |
gsva.es_temp <- gsva.es
gsva.es_temp <- as.data.frame(t(gsva.es_temp))
pheatmap(gsva.es[rownames(top_table_up_sig), ],
annotation_col = annotation_col,
clustering_distance_cols = "minkowski",
annotation_colors = ann_colors,
scale = "row",
cluster_rows = TRUE, cluster_cols = T)# iris.umap_learn <- umap::umap(t(gsva.es), method="umap-learn")
pca <- prcomp(t(gsva.es))
## make a scree plot
pca.var <- pca$sdev^2
pca.var.per <- round(pca.var/sum(pca.var)*100, 1)
pca.var.per## [1] 70.6 13.0 4.5 2.0 1.8 1.6 1.0 0.8 0.6 0.5 0.5 0.4 0.3 0.3 0.3
## [16] 0.2 0.2 0.2 0.2 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.0 0.0 0.0
## [31] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## [46] 0.0 0.0 0.0 0.0 0.0
group <- meta$`Histopathological.diagnostic:ch1`
group[group!="non-tumor"] <- "glioblastoma, grade 4"
group <- factor(group,
levels = c("non-tumor", "glioblastoma, grade 4"))
pcs <- data.frame(PC1 = pca$x[, 1],
PC2 = pca$x[, 2],
Group = group)
plot <- ggplot(pcs,
aes(x = PC1, y = PC2,
color = Group)) +
geom_point(size = 3) +
ggtitle("Pathway-based PCA plot of Healthy\nVs Tumour Samples") +
theme_bw() +
xlab(paste0("PC1: ", pca.var.per[1], "%")) +
ylab(paste0("PC2: ", pca.var.per[2], "%")) +
scale_color_manual(values = c("blue", "red")) +
theme_minimal() +
theme(aspect.ratio = 1)
plotPC1
# Create the dot plot
data <- pca$rotation[, 1]
data <- data.frame(Pathway = names(data),
Loading = as.numeric(data))
data <- data[order(abs(data$Loading), decreasing = T), ][1:25, ]
ggplot(data, aes(x = Loading, y = reorder(Pathway, Loading))) +
geom_point(size = 3, color = "red", alpha = 0.7) +
labs(title = "PCA Plot\nLoading scores of PC1",
x = "Loading scores",
y = "Pathway") +
theme_minimal()Pseudotime
best_features <- abs(pca$rotation[, 1])
best_features <- sort(best_features, decreasing = T)
best_features <- names(best_features[1:22])
all(rownames(gsva.es_temp) == meta$Row.names)## [1] TRUE
gsva.es_temp$Group <- meta$`Histopathological.diagnostic:ch1`
healthy_data <- subset(gsva.es_temp[, c(best_features[1:10], "Group")],
Group == "non-tumor")
healthy_data$Group <- NULL
robust_cov <- covMcd(healthy_data)
robust_mean <- robust_cov$center
robust_cov_matrix <- robust_cov$cov
mahalanobis_distances <- apply(gsva.es_temp[, best_features[1:10]],
1, function(row) {
mahalanobis(row,
robust_mean,
robust_cov_matrix)
})
gsva.es_temp$pt <- mahalanobis_distances
gsva.es_temp$Group <- factor(gsva.es_temp$Group,
levels = c("non-tumor",
"glioblastoma, grade 4"))
ggplot(gsva.es_temp,
aes(x = log(pt),
group = Group,
# color = Group,
fill = Group)) +
geom_density(alpha = 0.6, color = NA) +
ggtitle("Pathway progression towards grade 4 glioblastoma") +
xlab("log(Pseudotime)") +
scale_fill_manual(values = c("blue", "red"))+
theme_bw() +
theme_minimal()## [1] "HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION"
## [2] "HALLMARK_INTERFERON_ALPHA_RESPONSE"
## [3] "HALLMARK_ANGIOGENESIS"
## [4] "HALLMARK_E2F_TARGETS"
## [5] "HALLMARK_INTERFERON_GAMMA_RESPONSE"
## [6] "HALLMARK_G2M_CHECKPOINT"
## [7] "HALLMARK_IL6_JAK_STAT3_SIGNALING"
## [8] "HALLMARK_TNFA_SIGNALING_VIA_NFKB"
## [9] "HALLMARK_APOPTOSIS"
## [10] "HALLMARK_HYPOXIA"
ggplot(gsva.es_temp, aes(x = log(pt), y = HALLMARK_KRAS_SIGNALING_DN)) +
geom_point(size = 1, alpha = 0.7, aes(color = Group)) +
geom_smooth(method = "loess", se = F, color = "black") +
labs(title = "Pseudotime vs HALLMARK_KRAS_SIGNALING_DN\nwith LOESS (Outliers Included)",
x = "Pseudotime (Distance from Root Centroid)",
y = "HALLMARK_KRAS_SIGNALING_DN",
color = "Diabetic Status") +
scale_color_manual(values = c( "red", "blue"),
labels = c("Stage4 Glioblastoma", "non-tumour")) +
theme_minimal()# Create two matrices for pathway scores of the two groups
non_tumor_matrix <- gsva.es_temp[gsva.es_temp$Group == "non-tumor", best_features]
tumor_matrix <- gsva.es_temp[gsva.es_temp$Group == "glioblastoma, grade 4", best_features]
# Compute dot products for each pathway
dot_products <- sapply(best_features, function(pathway) {
non_tumor_vec <- non_tumor_matrix[, pathway]
tumor_vec <- tumor_matrix[, pathway]
# Ensure vectors have the same length
dot_product <- sum(non_tumor_vec * tumor_vec)
return(dot_product)
})
# Print dot products for each pathway
dot_products## HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION
## 7.481499
## HALLMARK_INTERFERON_ALPHA_RESPONSE
## 8.755495
## HALLMARK_ANGIOGENESIS
## 4.348181
## HALLMARK_E2F_TARGETS
## 11.159694
## HALLMARK_INTERFERON_GAMMA_RESPONSE
## 6.400510
## HALLMARK_G2M_CHECKPOINT
## 6.737549
## HALLMARK_IL6_JAK_STAT3_SIGNALING
## 1.864559
## HALLMARK_TNFA_SIGNALING_VIA_NFKB
## 9.589849
## HALLMARK_APOPTOSIS
## 11.187977
## HALLMARK_HYPOXIA
## 13.627528
## HALLMARK_INFLAMMATORY_RESPONSE
## 1.157551
## HALLMARK_MYC_TARGETS_V2
## 14.142401
## HALLMARK_COAGULATION
## 2.851030
## HALLMARK_MTORC1_SIGNALING
## 28.574042
## HALLMARK_GLYCOLYSIS
## 10.913865
## HALLMARK_PANCREAS_BETA_CELLS
## 5.918176
## HALLMARK_ALLOGRAFT_REJECTION
## 1.573030
## HALLMARK_DNA_REPAIR
## 26.025426
## HALLMARK_UNFOLDED_PROTEIN_RESPONSE
## 28.436408
## HALLMARK_NOTCH_SIGNALING
## 6.883375
## HALLMARK_TGF_BETA_SIGNALING
## 17.260667
## HALLMARK_IL2_STAT5_SIGNALING
## 5.691558
# Convert dot products to a dataframe for plotting
dot_product_df <- data.frame(
Pathway = names(dot_products),
DotProduct = dot_products
)
# Create the bar plot
ggplot(dot_product_df, aes(x = reorder(Pathway, DotProduct), y = DotProduct)) +
geom_bar(stat = "identity", fill = "skyblue") +
coord_flip() +
labs(title = "Dot Products between\nNon-tumor and Stage 4 Glioblastoma",
x = "Pathway",
y = "Dot Product") +
theme_minimal()oi <- c(best_features[1:10], "HALLMARK_KRAS_SIGNALING_DN")
d <- gather(gsva.es_temp,
key = "pathway",
value = "score",
oi)
library(ggplot2)
library(viridis)
# Calculate midpoint for labeling
label_positions <- d %>%
group_by(pathway) %>%
summarise(log_pt = median(log(pt)),
score = score[which.min(abs(log(pt) - median(log(pt))))])
# Define color palette for pathways and points
pathway_colors <- c("HALLMARK_ANGIOGENESIS" = "green",
"HALLMARK_APOPTOSIS" = "orange",
"HALLMARK_E2F_TARGETS" = "purple",
"HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION" = "brown",
"HALLMARK_G2M_CHECKPOINT" = "darkblue",
"HALLMARK_HYPOXIA" = "pink",
"HALLMARK_IL6_JAK_STAT3_SIGNALING" = "yellow",
"HALLMARK_INTERFERON_ALPHA_RESPONSE" = "darkgreen",
"HALLMARK_INTERFERON_GAMMA_RESPONSE" = "darkred",
"HALLMARK_TNFA_SIGNALING_VIA_NFKB" = "cyan",
"HALLMARK_KRAS_SIGNALING_DN" = "black")
group_colors <- c("non-tumor" = "blue", "glioblastoma, grade 4" = "red")
ggplot(d, aes(x = log(pt), y = score, group = pathway)) +
geom_smooth(aes(color = pathway), method = "loess", se = FALSE, size = 1) +
geom_point(aes(color = Group), size = 0.01) +
scale_color_manual(values = c(pathway_colors, group_colors)) +
# geom_text(data = label_positions, aes(x = log_pt, y = score, label = pathway),
# hjust = -0.2, size = 3, check_overlap = TRUE) + # Adjust hjust as needed
theme_minimal() +
guides(color = guide_legend(override.aes = list(size = 4))) + # Increase legend dot size
labs(title = "Pseudotime vs Multiple Pathways",
x = "log(Pseudotime)",
y = "Pathway Activity",
color = "Group/Pathway") +
theme(legend.position = "right")d$pathway <- gsub("HALLMARK_", "", d$pathway)
ggplot(d, aes(x = log(pt), y = score, color = Group)) +
geom_point(size = 2) +
geom_smooth(method = "loess", se = FALSE, color = "black") +
facet_wrap(~pathway, scales = "free_y") +
theme_minimal() +
guides(color = guide_legend(override.aes = list(size = 4))) + # Increase legend dot size
labs(title = "Pathway Activity Over Pseudotime",
x = "log(Pseudotime)",
y = "Pathway Activity") +
scale_color_manual(values = c("blue", "red"))gsva.es_temp$log_pt <- log(gsva.es_temp$pt)
library(corrr)
gsva.es_temp %>%
correlate(method = "spearman") %>%
shave() %>%
rplot(print_cor = T) +
theme(axis.text.x = element_text(angle = 45,
vjust = 1,
hjust = 1))Cell marker analysis
## species tissue_class tissue_type uberonongology_id cancer_type
## 1 Human Abdomen Abdominal fat pad <NA> Normal
## 2 Human Abdomen Abdominal fat pad <NA> Normal
## 3 Human Abdomen Abdominal fat pad <NA> Normal
## 4 Mouse Abdomen Muscle UBERON_0001630 Normal
## 5 Mouse Abdomen Muscle UBERON_0001630 Normal
## 6 Mouse Abdomen Muscle UBERON_0001630 Normal
## cell_type cell_name cellontology_id marker Symbol
## 1 Normal cell Brown adipocyte CL_0000449 FABP4 FABP4
## 2 Normal cell Brown adipocyte CL_0000449 PDGFRα <NA>
## 3 Normal cell Brown adipocyte CL_0000449 UCP1 UCP1
## 4 Normal cell Fibro-adipogenic progenitor cell <NA> Wisp1 Ccn4
## 5 Normal cell Myoblast CL_0000056 Myod1 Myod1
## 6 Normal cell Muscle satellite cell CL_0000514 Myf5 Myf5
## GeneID Genetype Genename UNIPROTID
## 1 2167 protein_coding fatty acid binding protein 4 E7DVW4
## 2 <NA> <NA> <NA> <NA>
## 3 7350 protein_coding uncoupling protein 1 P25874
## 4 22402 protein_coding cellular communication network factor 4 O54775
## 5 17927 protein_coding myogenic differentiation 1 P10085
## 6 17877 protein_coding myogenic factor 5 A2RSK4
## technology_seq marker_source PMID
## 1 sci-RNA-seq Experiment 32355218
## 2 sci-RNA-seq Experiment 32355218
## 3 sci-RNA-seq Experiment 32355218
## 4 10x Chromium Experiment 35439171
## 5 10x Chromium Experiment 35439171
## 6 10x Chromium Experiment 35439171
## Title
## 1 Single-cell transcriptional networks in differentiating preadipocytes suggest drivers associated with tissue heterogeneity
## 2 Single-cell transcriptional networks in differentiating preadipocytes suggest drivers associated with tissue heterogeneity
## 3 Single-cell transcriptional networks in differentiating preadipocytes suggest drivers associated with tissue heterogeneity
## 4 An estrogen-sensitive fibroblast population drives abdominal muscle fibrosis in an inguinal hernia mouse model
## 5 An estrogen-sensitive fibroblast population drives abdominal muscle fibrosis in an inguinal hernia mouse model
## 6 An estrogen-sensitive fibroblast population drives abdominal muscle fibrosis in an inguinal hernia mouse model
## journal year
## 1 Nature communications 2020
## 2 Nature communications 2020
## 3 Nature communications 2020
## 4 JCI insight 2022
## 5 JCI insight 2022
## 6 JCI insight 2022
markers_sub <- dplyr::filter(markers,# Brain
species == "Human" & tissue_class=="Brain")
markers_sub$marker <- toupper(markers_sub$marker)
celltype <- unique(markers_sub$cell_name)
cell_markers <- list()
for (cell in celltype) {
# m_df <- read_excel(paste0("data/", m))
cell_name <- markers_sub[markers_sub$cell_name == cell, ]$cell_name
cell_name <- unique(gsub(" ", "_", cell_name))
cell_markers_list <- markers_sub[markers_sub$cell_name == cell, ]$marker
cell_markers[[cell_name]] <- na.omit(cell_markers_list)
}
filtered_list <- Filter(function(x) length(x) >= 5, cell_markers)
gsvaPar_cell_markers <- ssgseaParam(t(exprs_data_with_genes), filtered_list)
gsvaPar_cell_markers.es <- gsva(gsvaPar_cell_markers, verbose=FALSE)## [1] "Calculating ranks..."
## [1] "Calculating absolute values from ranks..."
## [1] "Normalizing..."
## [1] 57 100
gsvaPar_cell_markers.es <- as.data.frame(t(gsvaPar_cell_markers.es))
all(rownames(gsvaPar_cell_markers.es) == meta$Row.names)## [1] TRUE
groups <- factor(meta$`Histopathological.diagnostic:ch1`)
Groups_df <- data.frame(Groups = groups)
pheatmap(t(gsvaPar_cell_markers.es[, -(58:60)]),
annotation_col = annotation_col,
annotation_colors = ann_colors,
scale = "row",
cluster_rows = TRUE, cluster_cols = TRUE)groups <- factor(group)
Groups_df <- data.frame(Groups = groups)
Groups_df$Groups <- as.character(Groups_df$Groups)
Groups_df$Groups[Groups_df$Groups=="non-tumor"] <- "normal"
Groups_df$Groups[Groups_df$Groups=="glioblastoma, grade 4"] <- "grade4_glioblastoma"
groups <- Groups_df$Groups
groups <- factor(groups,
levels = c("normal", "grade4_glioblastoma"))
model_mat <- model.matrix(~ 0 + groups)
colnames(model_mat) <- levels(groups)
fit <- lmFit(t(gsvaPar_cell_markers.es[, -c(58:60)]),
model_mat)
contrast_matrix <- makeContrasts(tumour_vs_normal = grade4_glioblastoma - normal,
levels = model_mat)
fit2 <- contrasts.fit(fit,
contrast_matrix)
fit2 <- eBayes(fit2)
top_table <- topTable(fit2, adjust.method = "fdr", number = Inf)
top_table_up_sig_cell_markers <- top_table[top_table$adj.P.Val < 0.001, ]
dim(top_table_up_sig_cell_markers)## [1] 27 6
top_table_up_sig_cell_markers %>%
head(10) %>%
knitr::kable(caption="Top 10 differential enrichment of cell signatures",
"pipe")| logFC | AveExpr | t | P.Value | adj.P.Val | B | |
|---|---|---|---|---|---|---|
| Deep_layer_neuron | -0.2026764 | -0.1645302 | -12.425196 | 0 | 0 | 39.51158 |
| Cancer_stem_cell | 0.1762225 | 0.2558394 | 10.965089 | 0 | 0 | 32.25738 |
| Inhibitory_neuron | -0.2562831 | 0.0577846 | -10.409146 | 0 | 0 | 29.46527 |
| Monocyte | 0.2031978 | 0.0797341 | 9.160911 | 0 | 0 | 23.18587 |
| Non-neuron | 0.0858517 | 0.2218806 | 9.021946 | 0 | 0 | 22.48919 |
| CD4+_T_cell | 0.0545165 | 0.1637132 | 8.883070 | 0 | 0 | 21.79404 |
| Cancer_cell | 0.0459180 | 0.2332291 | 8.880636 | 0 | 0 | 21.78186 |
| Neural_progenitor_cell | 0.1467002 | 0.1828592 | 8.796886 | 0 | 0 | 21.36326 |
| Neuron | -0.1360409 | 0.1312496 | -8.621252 | 0 | 0 | 20.48708 |
| Tissue-resident_macrophage | 0.1688809 | 0.2758632 | 8.189982 | 0 | 0 | 18.34749 |
pheatmap(t(gsvaPar_cell_markers.es[, rownames(top_table_up_sig_cell_markers)]),
annotation_col = annotation_col,
annotation_colors = ann_colors,
scale = "row",
main = "Cell signatures",
cluster_rows = TRUE, cluster_cols = TRUE)# iris.umap_learn <- umap::umap(t(gsva.es), method="umap-learn")
pca <- prcomp(gsvaPar_cell_markers.es)
## make a scree plot
pca.var <- pca$sdev^2
pca.var.per <- round(pca.var/sum(pca.var)*100, 1)
pca.var.per## [1] 55.4 11.9 8.7 4.7 3.7 2.4 1.9 1.7 1.2 1.1 0.8 0.8 0.6 0.6 0.5
## [16] 0.4 0.4 0.3 0.3 0.2 0.2 0.2 0.2 0.2 0.2 0.1 0.1 0.1 0.1 0.1
## [31] 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## [46] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
pcs <- data.frame(PC1 = pca$x[, 1],
PC2 = pca$x[, 2],
Group = group)
pcs$Group <- factor(pcs$Group,
levels = c("non-tumor", "glioblastoma, grade 4"))
plot <- ggplot(pcs,
aes(x = PC1, y = PC2,
color = Group)) +
geom_point(size = 3) +
ggtitle("Cell signature-based PCA plot of Healthy\nVs Tumour Samples") +
theme_bw() +
xlab(paste0("PC1: ", pca.var.per[1], "%")) +
ylab(paste0("PC2: ", pca.var.per[2], "%")) +
theme_minimal() +
scale_color_manual(values = c( "blue", "red")) +
theme(aspect.ratio = 1)
plotPseudotime calculation
library(robustbase)
best_features <- abs(pca$rotation[, 1])
best_features <- sort(best_features, decreasing = T)
best_features <- names(best_features)[1:10]
all(rownames(gsvaPar_cell_markers.es) == meta$Row.names) # T## [1] TRUE
gsvaPar_cell_markers.es$Group <- meta$`Histopathological.diagnostic:ch1`
healthy_data <- subset(gsvaPar_cell_markers.es[, c(best_features, "Group")],
Group == "non-tumor")
healthy_data$Group <- NULL
robust_cov <- covMcd(healthy_data)
robust_mean <- robust_cov$center
robust_cov_matrix <- robust_cov$cov
mahalanobis_distances <- apply(gsvaPar_cell_markers.es[, best_features],
1, function(row) {
mahalanobis(row,
robust_mean,
robust_cov_matrix)
})
gsvaPar_cell_markers.es$pt <- mahalanobis_distancesggplot(gsvaPar_cell_markers.es,
aes(x = log(pt),
group = Group,
color = Group,
fill = Group)) +
geom_density(alpha = 0.6) +
ggtitle("Pathway progression to Glioblastoma using Cell signatures") +
xlab("log Pseudotime") +
theme_bw() +
theme_minimal()ggplot(gsvaPar_cell_markers.es, aes(x = log(pt), y = Fibroblast)) +
geom_point(size = 1, alpha = 0.7, aes(color = Group)) +
geom_smooth(method = "loess", se = F, color = "black") +
labs(title = "Pseudotime vs Fibroblast with LOESS (Outliers Included)",
x = "Pseudotime (Distance from Root Centroid)",
y = "Fibroblast",
color = "Diabetic Status") +
scale_color_manual(values = c( "red", "blue"),
labels = c("Stage4 Glioblastoma", "non-tumour")) +
theme_minimal()# Define the cell types you're interested in
cell_types <- names(pca$rotation[, 1]) # Add more cell types as needed
cell_types <- gsub("\\(.*?\\)", "", cell_types)
cell_types <- gsub("\\+", "", cell_types)
cell_types <- gsub("\\-", "", cell_types)
colnames(gsvaPar_cell_markers.es) <- gsub("\\(.*?\\)",
"", colnames(gsvaPar_cell_markers.es))
colnames(gsvaPar_cell_markers.es) <- gsub("\\+",
"", colnames(gsvaPar_cell_markers.es))
colnames(gsvaPar_cell_markers.es) <- gsub("\\-",
"", colnames(gsvaPar_cell_markers.es))
# Loop through each cell type and generate the corresponding plot
for (cell_type in cell_types) {
# Generate the plot for the current cell type
p <- ggplot(gsvaPar_cell_markers.es, aes_string(x = "log(pt)",
y = cell_type)) +
geom_point(size = 1, alpha = 0.7, aes(color = Group)) +
geom_smooth(method = "loess", se = FALSE, color = "black") +
labs(title = paste("Pseudotime vs", cell_type, "with LOESS (Outliers Included)"),
x = "Pseudotime (Distance from Root Centroid)",
y = cell_type,
color = "Diabetic Status") +
scale_color_manual(values = c("red", "blue"),
labels = c("Stage4 Glioblastoma", "non-tumour")) +
theme_minimal()
# Print the plot
print(p)
}gsvaPar_cell_markers.es$log_pt <- log(gsvaPar_cell_markers.es$pt)
gsvaPar_cell_markers.es %>%
correlate(method = "spearman") %>%
shave() %>%
rplot(print_cor = T) +
theme(axis.text.x = element_text(angle = 45,
vjust = 1,
hjust = 1))Cell-type decomposition
cell_abundance <- meta
corrected_bulk <- exprs_data_with_genes
# Scale the corrected bulk data
bulk_scaled <- t(corrected_bulk)
# Pathway list (replace with your filtered list of pathways)
pways <- names(filtered_list)
pways <- gsub("\\(.*?\\)", "", pways)
pways <- gsub("\\+", "", pways)
pways <- gsub("\\-", "", pways)
names(filtered_list) <- pways
for (pway in pways) {
marker_genes <- filtered_list[[pway]] # Extract marker genes for the pathway
gsva_score <- gsvaPar_cell_markers.es[, pway] # GSVA score for the pathway
# Subset the bulk expression data to include only marker genes
bulk_markers <- bulk_scaled[rownames(bulk_scaled) %in% marker_genes, ]
# Calculate the variance of each marker gene across samples
gene_variances <- apply(bulk_markers, 1, var)
gene_variances <- sort(gene_variances, decreasing = TRUE)
# Use all high variance genes (or subset if needed)
high_variance_genes <- names(gene_variances)
# Subset bulk data using high-variance marker genes
bulk_high_variance <- bulk_markers[high_variance_genes, ]
# Create a diagonal weight matrix using the variances of the high-variance marker genes
W <- diag(gene_variances[high_variance_genes])
# Multiply the bulk expression matrix by the variances (element-wise multiplication)
XW <- t(t(bulk_high_variance) * gene_variances[high_variance_genes])
# Perform PCA on the weighted expression matrix
pca_result <- prcomp(t(XW), center = TRUE, scale. = FALSE)
# Create a data frame for PCA results
x_df <- as.data.frame(pca_result$x)
# Identify the component most correlated with the GSVA score
ind <- which.max(sapply(x_df, function(x) cor(x, gsva_score, method = "spearman")))
# Extract the most correlated principal component
first_PC <- pca_result$x[, ind]
# Normalize PC1 values to [0, 1] to interpret as proportions
normalized_proportion <- (first_PC - min(first_PC)) / (max(first_PC) - min(first_PC))
# Plot the barplot for the estimated proportions
# barplot(normalized_proportion, main = paste0("Estimated Proportion of ", pway),
# col = rainbow(length(normalized_proportion)), xlab = "Samples", ylab = #
#"Proportion")
# Add normalized proportions to the cell_abundance data for the current pathway
cell_abundance[, pway] <- as.numeric(normalized_proportion)
# Ensure consistent group levels
names(cell_abundance)[names(cell_abundance) == "Histopathological.diagnostic:ch1"] <- "Group"
cell_abundance$Group <- factor(cell_abundance$Group,
levels = c("non-tumor", "glioblastoma, grade 4"))
# Plot boxplot for the pathway across the groups
p <- ggplot(cell_abundance,
aes(x = Group,
fill = Group,
y = .data[[pway]])) + # Use .data[[pway]] to reference column dynamically
ggtitle(pway) +
geom_boxplot(outlier.shape = NA) +
theme_bw() +
theme_minimal() +
scale_fill_manual(values = c("blue", "red"))+
xlab("Group") +
ylab(pway) +
theme(aspect.ratio = 1)
print(p)
}## [1] TRUE
cell_abundance$lop_pt <- log(gsva.es_temp$pt)
cell_abundance %>%
correlate(method = "spearman") %>%
shave() %>%
rplot(print_cor = T) +
theme(axis.text.x = element_text(angle = 45,
vjust = 1,
hjust = 1))ggplot(gsvaPar_cell_markers.es,
aes(x = log(pt),
group = Group,
color = Group,
fill = Group)) +
geom_density(alpha = 0.6) +
ggtitle("Pathway progression to Glioblastoma using Cell signatures") +
xlab("log Pseudotime") +
theme_bw() +
theme_minimal()# cell_abundance <- cell_abundance[order(cell_abundance$lop_pt), ]
best_cells <- abs(pca$rotation[, 1])
best_cells <- sort(best_cells, decreasing = T)
best_cells <- names(best_cells)
best_cells <- gsub("\\(.*?\\)", "", best_cells)
best_cells <- gsub("\\+", "", best_cells)
best_cells <- gsub("\\-", "", best_cells)
cell_names <- c("Tissueresident_macrophage", "Cytotoxic_T_cell", "CD8_T_cell", "Regulatory_T_cell", "Infiltrating_macrophage",
"Neuron", "Deep_layer_neuron", "Intermediate_progenitor_cell", "Inhibitory_neuron", "Oligodendrocyte")
pheatmap(t(cell_abundance[, cell_names[1:4]]),
annotation_col = annotation_col,
main = "Cell abundance",
annotation_colors = ann_colors)Visualising cell abuundance over time
groups <- factor(group)
Groups_df <- data.frame(Groups = groups)
Groups_df$Groups <- as.character(Groups_df$Groups)
Groups_df$Groups[Groups_df$Groups=="non-tumor"] <- "normal"
Groups_df$Groups[Groups_df$Groups=="glioblastoma, grade 4"] <- "grade4_glioblastoma"
groups <- Groups_df$Groups
groups <- factor(groups,
levels = c("normal", "grade4_glioblastoma"))
model_mat <- model.matrix(~ 0 + groups)
colnames(model_mat) <- levels(groups)
cell_abundance_use <- cell_abundance[, 4:60]
fit <- lmFit(t(cell_abundance_use),
model_mat)
contrast_matrix <- makeContrasts(tumour_vs_normal = grade4_glioblastoma - normal,
levels = model_mat)
fit2 <- contrasts.fit(fit,
contrast_matrix)
fit2 <- eBayes(fit2)
top_table <- topTable(fit2, adjust.method = "fdr", number = Inf)
top_table_up_sig_cell_abundance <- top_table[top_table$adj.P.Val < 0.001, ]
dim(top_table_up_sig_cell_abundance)## [1] 18 6
top_table_up_sig_cell_abundance %>%
head(10) %>%
knitr::kable(caption = "Changes in cell abundance in non-tumor vs tumor samples",
"pipe")| logFC | AveExpr | t | P.Value | adj.P.Val | B | |
|---|---|---|---|---|---|---|
| Deep_layer_neuron | -0.3592237 | 0.3798883 | -8.878146 | 0e+00 | 0.0e+00 | 22.104225 |
| Neuron | -0.4864960 | 0.4764828 | -7.957835 | 0e+00 | 0.0e+00 | 17.530925 |
| Myeloid_cell | 0.1950043 | 0.5958006 | 7.466979 | 0e+00 | 0.0e+00 | 15.136503 |
| Glial_cell | -0.2330704 | 0.5613311 | -6.382376 | 0e+00 | 1.0e-07 | 10.023023 |
| Intermediate_progenitor_cell | -0.1970485 | 0.3683879 | -5.971008 | 0e+00 | 4.0e-07 | 8.169587 |
| Interneuron | -0.1743731 | 0.5680965 | -5.827110 | 1e-07 | 6.0e-07 | 7.535014 |
| Stem_cell | 0.1687167 | 0.5252583 | 5.737290 | 1e-07 | 8.0e-07 | 7.142829 |
| Oligodendrocyte | -0.2766881 | 0.4246346 | -5.665859 | 1e-07 | 1.0e-06 | 6.833153 |
| Radial_glial_cell | 0.2063688 | 0.6577391 | 5.509277 | 3e-07 | 1.6e-06 | 6.161448 |
| Cancer_cell | 0.1690079 | 0.5978796 | 5.504025 | 3e-07 | 1.6e-06 | 6.139093 |
cells_to_plot <- c(rownames(top_table_up_sig_cell_abundance))
pheatmap(t(cell_abundance[,cells_to_plot]),
annotation_col = annotation_col,
main = "Cell abundance",
annotation_colors = ann_colors)## [1] TRUE
cell_abundance$log_pt <- gsvaPar_cell_markers.es$log_pt
for (sig in sort(best_cells)) {
p <- ggplot(cell_abundance, aes(x = log_pt, y = .data[[sig]])) +
geom_point(aes(color = Group)) +
geom_smooth(method = "loess", se = F, color="black") + # Optionally add a smoothed line
labs(title = paste0(sig, " Abundance Along Pseudotime"),
x = "log(Pseudotime)",
y = sig) +
scale_color_manual(values = c("blue", "red"))+
guides(color = guide_legend(override.aes = list(size = 4))) +
theme_minimal()
print(p)
}Visualise cell abundances over time for cells of interest
cells_oi <- c("Cancer_cell","Deep_layer_neuron",
"Effector_memory_CD4_T_cell", "Naive_CD8_T_cell",
"CD4_T_cell", "Monocyte", "Cytotoxic_T_cell","Tissueresident_macrophage",
"Astrocyte", "B_cell", "Lymphocyte", "Regulatory_T_cell", "T_cell")
cells_df <- gather(cell_abundance,
key = "celltype",
value = "Abundance",
cells_oi)
cells_df$Group <- factor(cells_df$Group,
levels = c("non-tumor", "glioblastoma, grade 4"))
ggplot(cells_df, aes(x = log_pt,
y = Abundance, color = Group)) +
geom_point(size = 0.5) +
geom_smooth(method = "loess", se = FALSE, color = "black") +
facet_wrap(~celltype, scales = "free_y") +
theme_minimal() +
guides(color = guide_legend(override.aes = list(size = 4))) + # Increase legend dot size
labs(title = "Cell abundance over pseudotime",
x = "log(Pseudotime)",
y = "Cell abundance") +
scale_color_manual(values = c("blue", "red"))Visualise cell activities over time
cells_df <- gather(gsvaPar_cell_markers.es,
key = "celltype",
value = "Activity",
cells_oi)
cells_df$Group <- factor(cells_df$Group,
levels = c("non-tumor", "glioblastoma, grade 4"))
ggplot(cells_df, aes(x = log_pt, y = Activity, color = Group)) +
geom_point(size = 0.5) +
geom_smooth(method = "loess", se = FALSE, color = "black") +
facet_wrap(~celltype, scales = "free_y") +
theme_minimal() +
guides(color = guide_legend(override.aes = list(size = 4))) + # Increase legend dot size
labs(title = "Cell activity over pseudotime",
x = "log(Pseudotime)",
y = "Cell activity") +
scale_color_manual(values = c("blue", "red"))# Define your cell types of interest here
cell_type_1 <- "Cytotoxic_T_cell" # Example: Cytotoxic T cells
cell_type_2 <- "Fibroblast" # Example: Fibroblasts
# Sort data by pseudotime (log_pt)
gsvaPar_cell_markers.es <- gsvaPar_cell_markers.es[order(gsvaPar_cell_markers.es$log_pt), ]
# Subset the data for non-tumor and tumor groups
non_tumor <- subset(gsvaPar_cell_markers.es, Group == "non-tumor")
non_tumor <- non_tumor[order(non_tumor$log_pt), ]
tumor <- subset(gsvaPar_cell_markers.es, Group == "glioblastoma, grade 4")
tumor <- tumor[order(tumor$log_pt), ]
# Perform convolution for tumor and non-tumor data with the selected cell types
conv_tumor <- convolve(tumor[[cell_type_1]], rev(tumor[[cell_type_2]]), type = "open")
conv_non_tumor <- convolve(non_tumor[[cell_type_1]], rev(non_tumor[[cell_type_2]]), type = "open")
# Find peaks for tumor and non-tumor data
peak_tumor <- which.max(conv_tumor)
peak_non_tumor <- which.max(conv_non_tumor)
# Retrieve pseudotime for these peaks
tumor_peak_pseudotime <- tumor$log_pt[peak_tumor]
non_tumor_peak_pseudotime <- non_tumor$log_pt[peak_non_tumor]
# Print peak pseudotime results
print(paste("Tumor peak occurs at pseudotime:", tumor_peak_pseudotime))## [1] "Tumor peak occurs at pseudotime: 2.60863020804817"
## [1] "Non-tumor peak occurs at pseudotime: 0.932774083761205"
# Pad conv_non_tumor with zeros to match the length of conv_tumor
conv_non_tumor_padded <- c(conv_non_tumor, rep(0, length(conv_tumor) - length(conv_non_tumor)))
# Combine results into a data frame
comparison_data <- data.frame(
Index = 1:length(conv_tumor),
tumor = conv_tumor,
non_tumor = conv_non_tumor_padded
)
# Print to verify
print(comparison_data)## Index tumor non_tumor
## 1 1 0.00592342 0.002067378
## 2 2 -0.08524187 0.011592281
## 3 3 -0.02558490 0.019096839
## 4 4 -0.06154408 0.025639046
## 5 5 -0.06908373 0.014769823
## 6 6 -0.06450753 0.002086915
## 7 7 -0.12072761 -0.011454689
## 8 8 -0.12613254 -0.022242082
## 9 9 -0.16639701 -0.026141320
## 10 10 -0.10415793 -0.031233044
## 11 11 -0.19331134 -0.025423023
## 12 12 -0.21737332 -0.015061214
## 13 13 -0.17467109 -0.054291684
## 14 14 -0.24478680 -0.114269312
## 15 15 -0.25964452 -0.129460645
## 16 16 -0.30548873 -0.116787028
## 17 17 -0.37806693 -0.102998076
## 18 18 -0.43365051 -0.110913489
## 19 19 -0.48516861 -0.128843910
## 20 20 -0.52520987 -0.110513850
## 21 21 -0.57309182 -0.125146806
## 22 22 -0.61656401 -0.160079066
## 23 23 -0.63448213 -0.137631948
## 24 24 -0.72355744 -0.144750017
## 25 25 -0.78180533 -0.142370302
## 26 26 -0.87950854 -0.148282850
## 27 27 -0.90639724 -0.153612426
## 28 28 -0.92391042 -0.153154697
## 29 29 -1.08237541 -0.151212845
## 30 30 -1.02748315 -0.133996308
## 31 31 -1.15243768 -0.107086876
## 32 32 -1.10156070 -0.106920583
## 33 33 -1.17123531 -0.113948170
## 34 34 -1.19839591 -0.116067851
## 35 35 -1.26242492 -0.129217272
## 36 36 -1.40125400 -0.080701829
## 37 37 -1.35059424 -0.021131364
## 38 38 -1.48679877 -0.029584338
## 39 39 -1.51144414 -0.029312420
## 40 40 -1.57329052 -0.043692890
## 41 41 -1.58915767 -0.025924590
## 42 42 -1.70508213 -0.016158879
## 43 43 -1.74430318 -0.019995109
## 44 44 -1.78747553 -0.004204721
## 45 45 -1.83574393 0.001379107
## 46 46 -1.86433301 0.000000000
## 47 47 -1.96827813 0.000000000
## 48 48 -1.96755059 0.000000000
## 49 49 -2.02472353 0.000000000
## 50 50 -2.04807563 0.000000000
## 51 51 -2.20391808 0.000000000
## 52 52 -2.25358630 0.000000000
## 53 53 -2.22188095 0.000000000
## 54 54 -2.29020098 0.000000000
## 55 55 -2.38348495 0.000000000
## 56 56 -2.42428057 0.000000000
## 57 57 -2.49610121 0.000000000
## 58 58 -2.52423991 0.000000000
## 59 59 -2.47522829 0.000000000
## 60 60 -2.58137790 0.000000000
## 61 61 -2.65618690 0.000000000
## 62 62 -2.68838372 0.000000000
## 63 63 -2.68148989 0.000000000
## 64 64 -2.67381839 0.000000000
## 65 65 -2.79850790 0.000000000
## 66 66 -2.76898956 0.000000000
## 67 67 -2.79785440 0.000000000
## 68 68 -2.88583156 0.000000000
## 69 69 -2.88618636 0.000000000
## 70 70 -2.84514917 0.000000000
## 71 71 -2.99913081 0.000000000
## 72 72 -3.09322580 0.000000000
## 73 73 -3.01372547 0.000000000
## 74 74 -3.10634154 0.000000000
## 75 75 -3.09678618 0.000000000
## 76 76 -3.15869484 0.000000000
## 77 77 -3.13541723 0.000000000
## 78 78 -3.20915283 0.000000000
## 79 79 -3.10566823 0.000000000
## 80 80 -3.12692815 0.000000000
## 81 81 -3.22349149 0.000000000
## 82 82 -3.22903040 0.000000000
## 83 83 -3.13409163 0.000000000
## 84 84 -3.02959960 0.000000000
## 85 85 -2.98944898 0.000000000
## 86 86 -2.98115043 0.000000000
## 87 87 -2.98471909 0.000000000
## 88 88 -2.94182711 0.000000000
## 89 89 -2.93217311 0.000000000
## 90 90 -2.87744783 0.000000000
## 91 91 -2.80151838 0.000000000
## 92 92 -2.85996336 0.000000000
## 93 93 -2.77105720 0.000000000
## 94 94 -2.66994673 0.000000000
## 95 95 -2.63152993 0.000000000
## 96 96 -2.65448353 0.000000000
## 97 97 -2.62074572 0.000000000
## 98 98 -2.54728553 0.000000000
## 99 99 -2.46297238 0.000000000
## 100 100 -2.39524908 0.000000000
## 101 101 -2.36591508 0.000000000
## 102 102 -2.25807004 0.000000000
## 103 103 -2.29562853 0.000000000
## 104 104 -2.20045459 0.000000000
## 105 105 -2.17864597 0.000000000
## 106 106 -2.09039793 0.000000000
## 107 107 -2.09547133 0.000000000
## 108 108 -2.09591112 0.000000000
## 109 109 -1.98059415 0.000000000
## 110 110 -1.89594477 0.000000000
## 111 111 -1.92034841 0.000000000
## 112 112 -1.83655201 0.000000000
## 113 113 -1.75949102 0.000000000
## 114 114 -1.75094833 0.000000000
## 115 115 -1.62291204 0.000000000
## 116 116 -1.54742129 0.000000000
## 117 117 -1.52697677 0.000000000
## 118 118 -1.51890119 0.000000000
## 119 119 -1.43294804 0.000000000
## 120 120 -1.35419161 0.000000000
## 121 121 -1.32221427 0.000000000
## 122 122 -1.32470250 0.000000000
## 123 123 -1.20369993 0.000000000
## 124 124 -1.18265815 0.000000000
## 125 125 -1.19452538 0.000000000
## 126 126 -1.17483169 0.000000000
## 127 127 -1.10820125 0.000000000
## 128 128 -1.09181093 0.000000000
## 129 129 -1.02334051 0.000000000
## 130 130 -0.98954762 0.000000000
## 131 131 -0.93822139 0.000000000
## 132 132 -0.90597136 0.000000000
## 133 133 -0.85433839 0.000000000
## 134 134 -0.84297255 0.000000000
## 135 135 -0.79739932 0.000000000
## 136 136 -0.73349518 0.000000000
## 137 137 -0.69836566 0.000000000
## 138 138 -0.63660009 0.000000000
## 139 139 -0.59499438 0.000000000
## 140 140 -0.55284303 0.000000000
## 141 141 -0.54111413 0.000000000
## 142 142 -0.47546985 0.000000000
## 143 143 -0.44795230 0.000000000
## 144 144 -0.37450956 0.000000000
## 145 145 -0.34360599 0.000000000
## 146 146 -0.29605262 0.000000000
## 147 147 -0.28813879 0.000000000
## 148 148 -0.23599335 0.000000000
## 149 149 -0.21337879 0.000000000
## 150 150 -0.15950793 0.000000000
## 151 151 -0.12667616 0.000000000
## 152 152 -0.08443444 0.000000000
## 153 153 -0.04001786 0.000000000
# Plot convolution results for tumor vs. non_tumor data and annotate with pseudotime
ggplot(comparison_data, aes(x = Index)) +
geom_line(aes(y = tumor, color = "tumor"), size = 1.2) +
geom_line(aes(y = non_tumor, color = "non_tumor"), size = 1.2, linetype = "dashed") +
# Add text annotation for the pseudotime at the tumor peak
geom_text(aes(x = peak_tumor, y = conv_tumor[peak_tumor],
label = paste0("Peak at Pseudotime: ", round(tumor_peak_pseudotime, 2))),
color = "red", vjust = -1.5, size = 4) +
# Add text annotation for the pseudotime at the non-tumor peak
geom_text(aes(x = peak_non_tumor, y = conv_non_tumor_padded[peak_non_tumor],
label = paste0("Peak at Pseudotime: ", round(non_tumor_peak_pseudotime, 2))),
color = "blue", vjust = -1.5, size = 4) +
labs(
title = paste("Convolution Results:", cell_type_1, "vs", cell_type_2, "in Tumor vs Non-tumor"),
subtitle = paste(cell_type_1, "and", cell_type_2),
y = paste("Convolution Output (Interaction between", cell_type_1, "and", cell_type_2, ")"),
x = "Pseudotime (Ordered by log_pt)"
) +
scale_color_manual(values = c("tumor" = "red", "non_tumor" = "blue")) +
theme_minimal()Which pathways have strong interaction with the cytotoxic t cell signaure
gsva.es_temp_new <- gsva.es_temp
colnames(gsva.es_temp_new) <- gsub("HALLMARK_", "", colnames(gsva.es_temp_new))
# List of all cell types (excluding Cytotoxic T cells)
cell_types <- colnames(gsva.es_temp_new)[1:50] # Add your full list here
cell_type_1 <- "Regulatory_T_cell" # "Cytotoxic_T_cell" # Cytotoxic T cells are the fixed cell type
gsva.es_temp_new <- gsva.es_temp_new[rownames(gsvaPar_cell_markers.es), ]
all(rownames(gsva.es_temp_new) == rownames(gsvaPar_cell_markers.es))## [1] TRUE
# Ensure proper alignment between gsva.es_temp_new and gsvaPar_cell_markers.es
gsva.es_temp_new[, cell_type_1] <- gsvaPar_cell_markers.es[, cell_type_1]
gsva.es_temp_new %>%
correlate(method = "spearman") %>%
shave() %>%
rplot(print_cor = T) +
theme(axis.text.x = element_text(angle = 45,
vjust = 1,
hjust = 1))gsva.es_temp_new[, 1:50] <- scale(gsva.es_temp_new[, 1:50])
gsva.es_temp_new$log_pt <- log(gsva.es_temp_new$pt)
# Initialize a data frame to store the results for each cell type
results <- data.frame(Cell_Type = character(), Peak_Pseudotime = numeric(), Peak_Broadness = numeric())
# Loop through each cell type and perform the convolution analysis
for (cell_type_2 in cell_types) {
# Sort data by pseudotime (log_pt)
gsva.es_temp_new <- gsva.es_temp_new[order(gsva.es_temp_new$log_pt), ]
# Subset the data for non-tumor and tumor groups
non_tumor <- subset(gsva.es_temp_new, Group == "non-tumor")
# non_tumor[, 1:50] <- scale(non_tumor[, 1:50])
non_tumor <- non_tumor[order(non_tumor$log_pt), ]
tumor <- subset(gsva.es_temp_new, Group == "glioblastoma, grade 4")
# tumor[, 1:50] <- scale(tumor[, 1:50])
tumor <- tumor[order(tumor$log_pt), ]
# Perform convolution for tumor and non-tumor data with the selected cell types
conv_tumor <- convolve(tumor[[cell_type_1]], rev(tumor[[cell_type_2]]), type = "open")
conv_non_tumor <- convolve(non_tumor[[cell_type_1]], rev(non_tumor[[cell_type_2]]), type = "open")
# Find peaks for tumor and non-tumor data
if (!all(is.na(conv_tumor)) && length(unique(conv_tumor)) > 1) {
peak_tumor <- which.max(conv_tumor)
tumor_peak_pseudotime <- tumor$log_pt[peak_tumor]
# Calculate peak broadness for tumor
peak_value <- conv_tumor[peak_tumor]
half_max <- peak_value / 2 # Half maximum
# Find where convolution value is closest to half_max on both sides of the peak
left_index <- max(which(conv_tumor[1:peak_tumor] <= half_max), na.rm = TRUE)
right_index <- min(which(conv_tumor[peak_tumor:length(conv_tumor)] <= half_max), na.rm = TRUE) + peak_tumor - 1
tumor_peak_broadness <- right_index - left_index
} else {
tumor_peak_pseudotime <- NA
tumor_peak_broadness <- NA
}
if (!all(is.na(conv_non_tumor)) && length(unique(conv_non_tumor)) > 1) {
peak_non_tumor <- which.max(conv_non_tumor)
non_tumor_peak_pseudotime <- non_tumor$log_pt[peak_non_tumor]
} else {
non_tumor_peak_pseudotime <- NA
}
# Append the results to the data frame
results <- rbind(results, data.frame(
Cell_Type = cell_type_2,
Peak_Pseudotime = tumor_peak_pseudotime,
Peak_Broadness = tumor_peak_broadness
))
# Pad conv_non_tumor with zeros to match the length of conv_tumor
conv_non_tumor_padded <- c(conv_non_tumor, rep(0, length(conv_tumor) - length(conv_non_tumor)))
# Combine results into a data frame for plotting
comparison_data <- data.frame(
Index = 1:length(conv_tumor),
tumor = conv_tumor,
non_tumor = conv_non_tumor_padded
)
# Create the base plot
p <- ggplot(comparison_data, aes(x = Index)) +
geom_line(aes(y = tumor, color = "tumor"), size = 1.2) +
geom_line(aes(y = non_tumor, color = "non_tumor"), size = 1.2, linetype = "dashed") +
labs(
title = paste("Convolution Results:", cell_type_1, "vs", cell_type_2, "\nin Tumor vs Non-tumor"),
subtitle = paste(cell_type_1, "and", cell_type_2),
y = paste("Convolution Output (Interaction between", cell_type_1, "\nand", cell_type_2, ")"),
x = "Pseudotime (Ordered by log_pt)"
) +
scale_color_manual(values = c("tumor" = "red", "non_tumor" = "blue")) +
theme_minimal()
print(p)
# Conditionally add text annotation for the tumor peak (if valid)
if (!is.na(tumor_peak_pseudotime)) {
p <- p + annotate("text", x = peak_tumor, y = conv_tumor[peak_tumor],
label = paste0("Peak at Pseudotime: ", round(tumor_peak_pseudotime, 2)),
color = "red", vjust = -1.5, size = 4)
}
# Conditionally add text annotation for the non-tumor peak (if valid)
if (!is.na(non_tumor_peak_pseudotime)) {
p <- p + annotate("text", x = peak_non_tumor, y = conv_non_tumor_padded[peak_non_tumor],
label = paste0("Peak at Pseudotime: ", round(non_tumor_peak_pseudotime, 2)),
color = "blue", vjust = -1.5, size = 4)
}
# Print and save the plot for each cell type
print(p)
ggsave(filename = paste0("Convolution_", cell_type_1, "_vs_", cell_type_2, ".png"), plot = p)
}## Cell_Type Peak_Pseudotime Peak_Broadness
## 1 ADIPOGENESIS 9.437179 2
## 2 ALLOGRAFT_REJECTION 9.124698 2
## 3 ANDROGEN_RESPONSE 9.296048 3
## 4 ANGIOGENESIS 9.437179 3
## 5 APICAL_JUNCTION 9.050012 2
## 6 APICAL_SURFACE 9.399739 2
## 7 APOPTOSIS 8.580771 2
## 8 BILE_ACID_METABOLISM NA 24
## 9 CHOLESTEROL_HOMEOSTASIS 9.437179 3
## 10 COAGULATION 6.056476 3
## 11 COMPLEMENT 9.399739 2
## 12 DNA_REPAIR 9.437179 4
## 13 E2F_TARGETS 9.749659 11
## 14 EPITHELIAL_MESENCHYMAL_TRANSITION 8.777047 2
## 15 ESTROGEN_RESPONSE_EARLY NA 15
## 16 ESTROGEN_RESPONSE_LATE NA 3
## 17 FATTY_ACID_METABOLISM 9.437179 2
## 18 G2M_CHECKPOINT 9.709228 16
## 19 GLYCOLYSIS 9.244213 2
## 20 HEDGEHOG_SIGNALING NA 2
## 21 HEME_METABOLISM NA 6
## 22 HYPOXIA 9.244213 2
## 23 IL2_STAT5_SIGNALING 9.399739 2
## 24 IL6_JAK_STAT3_SIGNALING 9.399739 2
## 25 INFLAMMATORY_RESPONSE 9.124698 2
## 26 INTERFERON_ALPHA_RESPONSE 9.437179 3
## 27 INTERFERON_GAMMA_RESPONSE 9.399739 3
## 28 KRAS_SIGNALING_DN NA 23
## 29 KRAS_SIGNALING_UP 9.399739 2
## 30 MITOTIC_SPINDLE 9.773672 16
## 31 MTORC1_SIGNALING 9.709228 3
## 32 MYC_TARGETS_V1 9.709228 6
## 33 MYC_TARGETS_V2 9.709228 3
## 34 MYOGENESIS NA 27
## 35 NOTCH_SIGNALING 9.345042 3
## 36 OXIDATIVE_PHOSPHORYLATION 9.437179 3
## 37 P53_PATHWAY 9.437179 2
## 38 PANCREAS_BETA_CELLS NA 5
## 39 PEROXISOME 9.437179 3
## 40 PI3K_AKT_MTOR_SIGNALING 9.296048 2
## 41 PROTEIN_SECRETION 9.521545 3
## 42 REACTIVE_OXYGEN_SPECIES_PATHWAY NA 4
## 43 SPERMATOGENESIS NA 12
## 44 TGF_BETA_SIGNALING 9.050012 2
## 45 TNFA_SIGNALING_VIA_NFKB 9.399739 3
## 46 UNFOLDED_PROTEIN_RESPONSE 9.709228 2
## 47 UV_RESPONSE_DN NA 2
## 48 UV_RESPONSE_UP NA 3
## 49 WNT_BETA_CATENIN_SIGNALING 9.399739 3
## 50 XENOBIOTIC_METABOLISM NA 3
Which pathways have strong interaction with the regulatory T cell signaure
gsva.es_temp_new <- gsva.es_temp
# gsva.es_temp_new[, 1:50] <- scale(gsva.es_temp_new[, 1:50])
colnames(gsva.es_temp_new) <- gsub("HALLMARK_", "", colnames(gsva.es_temp_new))
# List of all cell types (excluding Cytotoxic T cells)
cell_types <- colnames(gsva.es_temp_new)[1:50] # Add your full list here
cell_type_1 <- "Cytotoxic_T_cell" # "Cytotoxic_T_cell" # Cytotoxic T cells are the fixed cell type
gsva.es_temp_new <- gsva.es_temp_new[rownames(gsvaPar_cell_markers.es), ]
all(rownames(gsva.es_temp_new) == rownames(gsvaPar_cell_markers.es))## [1] TRUE
# Ensure proper alignment between gsva.es_temp_new and gsvaPar_cell_markers.es
gsva.es_temp_new[, cell_type_1] <- gsvaPar_cell_markers.es[, cell_type_1]
gsva.es_temp_new %>%
correlate(method = "spearman") %>%
shave() %>%
rplot(print_cor = T) +
theme(axis.text.x = element_text(angle = 45,
vjust = 1,
hjust = 1))gsva.es_temp_new[, 1:50] <- scale(gsva.es_temp_new[, 1:50])
gsva.es_temp_new$log_pt <- log(gsva.es_temp_new$pt)
# Initialize a data frame to store the results for each cell type
results <- data.frame(Cell_Type = character(), Peak_Pseudotime = numeric(), Peak_Broadness = numeric())
# Loop through each cell type and perform the convolution analysis
for (cell_type_2 in cell_types) {
# Sort data by pseudotime (log_pt)
gsva.es_temp_new <- gsva.es_temp_new[order(gsva.es_temp_new$log_pt), ]
# Subset the data for non-tumor and tumor groups
non_tumor <- subset(gsva.es_temp_new, Group == "non-tumor")
# non_tumor[, 1:50] <- scale(non_tumor[, 1:50])
non_tumor <- non_tumor[order(non_tumor$log_pt), ]
tumor <- subset(gsva.es_temp_new, Group == "glioblastoma, grade 4")
# tumor[, 1:50] <- scale(tumor[, 1:50])
tumor <- tumor[order(tumor$log_pt), ]
# Perform convolution for tumor and non-tumor data with the selected cell types
conv_tumor <- convolve(tumor[[cell_type_1]], rev(tumor[[cell_type_2]]), type = "open")
conv_non_tumor <- convolve(non_tumor[[cell_type_1]], rev(non_tumor[[cell_type_2]]), type = "open")
# Find peaks for tumor and non-tumor data
if (!all(is.na(conv_tumor)) && length(unique(conv_tumor)) > 1) {
peak_tumor <- which.max(conv_tumor)
tumor_peak_pseudotime <- tumor$log_pt[peak_tumor]
# Calculate peak broadness for tumor
peak_value <- conv_tumor[peak_tumor]
half_max <- peak_value / 2 # Half maximum
# Find where convolution value is closest to half_max on both sides of the peak
left_index <- max(which(conv_tumor[1:peak_tumor] <= half_max), na.rm = TRUE)
right_index <- min(which(conv_tumor[peak_tumor:length(conv_tumor)] <= half_max), na.rm = TRUE) + peak_tumor - 1
tumor_peak_broadness <- right_index - left_index
} else {
tumor_peak_pseudotime <- NA
tumor_peak_broadness <- NA
}
if (!all(is.na(conv_non_tumor)) && length(unique(conv_non_tumor)) > 1) {
peak_non_tumor <- which.max(conv_non_tumor)
non_tumor_peak_pseudotime <- non_tumor$log_pt[peak_non_tumor]
} else {
non_tumor_peak_pseudotime <- NA
}
# Append the results to the data frame
results <- rbind(results, data.frame(
Cell_Type = cell_type_2,
Peak_Pseudotime = tumor_peak_pseudotime,
Peak_Broadness = tumor_peak_broadness
))
# Pad conv_non_tumor with zeros to match the length of conv_tumor
conv_non_tumor_padded <- c(conv_non_tumor, rep(0, length(conv_tumor) - length(conv_non_tumor)))
# Combine results into a data frame for plotting
comparison_data <- data.frame(
Index = 1:length(conv_tumor),
tumor = conv_tumor,
non_tumor = conv_non_tumor_padded
)
# Create the base plot
p <- ggplot(comparison_data, aes(x = Index)) +
geom_line(aes(y = tumor, color = "tumor"), size = 1.2) +
geom_line(aes(y = non_tumor, color = "non_tumor"), size = 1.2, linetype = "dashed") +
# Add smoothing using LOESS
geom_smooth(aes(y = tumor, color = "tumor"),
method = "loess", se = FALSE, span = 0.2, size = 1) +
geom_smooth(aes(y = non_tumor, color = "non_tumor"),
method = "loess", se = FALSE, span = 0.2, size = 1, linetype = "dashed") +
labs(
title = paste("Convolution Results:", cell_type_1, "vs", cell_type_2, "\nin Tumor vs Non-tumor"),
subtitle = paste(cell_type_1, "and", cell_type_2),
y = paste("Convolution Output (Interaction between", cell_type_1, "\nand", cell_type_2, ")"),
x = "Pseudotime (Ordered by log_pt)"
) +
scale_color_manual(values = c("tumor" = "red", "non_tumor" = "blue")) +
theme_minimal()
print(p)
# Conditionally add text annotation for the tumor peak (if valid)
if (!is.na(tumor_peak_pseudotime)) {
p <- p + annotate("text", x = peak_tumor, y = conv_tumor[peak_tumor],
label = paste0("Peak at Pseudotime: ", round(tumor_peak_pseudotime, 2)),
color = "red", vjust = -1.5, size = 4)
}
# Conditionally add text annotation for the non-tumor peak (if valid)
if (!is.na(non_tumor_peak_pseudotime)) {
p <- p + annotate("text", x = peak_non_tumor, y = conv_non_tumor_padded[peak_non_tumor],
label = paste0("Peak at Pseudotime: ", round(non_tumor_peak_pseudotime, 2)),
color = "blue", vjust = -1.5, size = 4)
}
# Print and save the plot for each cell type
print(p)
ggsave(filename = paste0("Convolution_", cell_type_1, "_vs_", cell_type_2, ".png"), plot = p)
}## Cell_Type Peak_Pseudotime Peak_Broadness
## 1 ADIPOGENESIS 3.765344 6
## 2 ALLOGRAFT_REJECTION 3.765344 9
## 3 ANDROGEN_RESPONSE 4.837055 13
## 4 ANGIOGENESIS 4.837055 10
## 5 APICAL_JUNCTION NA 6
## 6 APICAL_SURFACE 7.054422 10
## 7 APOPTOSIS 3.765344 10
## 8 BILE_ACID_METABOLISM NA 48
## 9 CHOLESTEROL_HOMEOSTASIS 6.181422 11
## 10 COAGULATION 4.837055 7
## 11 COMPLEMENT 4.837055 10
## 12 DNA_REPAIR 7.015352 31
## 13 E2F_TARGETS 7.015352 36
## 14 EPITHELIAL_MESENCHYMAL_TRANSITION 4.837055 10
## 15 ESTROGEN_RESPONSE_EARLY NA 23
## 16 ESTROGEN_RESPONSE_LATE NA 11
## 17 FATTY_ACID_METABOLISM NA 2
## 18 G2M_CHECKPOINT 7.369728 38
## 19 GLYCOLYSIS 7.054422 19
## 20 HEDGEHOG_SIGNALING 8.636104 41
## 21 HEME_METABOLISM NA 56
## 22 HYPOXIA 4.837055 10
## 23 IL2_STAT5_SIGNALING 4.837055 9
## 24 IL6_JAK_STAT3_SIGNALING 4.837055 10
## 25 INFLAMMATORY_RESPONSE 4.837055 10
## 26 INTERFERON_ALPHA_RESPONSE 3.765344 7
## 27 INTERFERON_GAMMA_RESPONSE 3.765344 7
## 28 KRAS_SIGNALING_DN NA 55
## 29 KRAS_SIGNALING_UP 4.837055 8
## 30 MITOTIC_SPINDLE 7.220172 39
## 31 MTORC1_SIGNALING 7.015352 21
## 32 MYC_TARGETS_V1 7.015352 33
## 33 MYC_TARGETS_V2 7.369728 30
## 34 MYOGENESIS NA 45
## 35 NOTCH_SIGNALING 3.765344 12
## 36 OXIDATIVE_PHOSPHORYLATION NA 3
## 37 P53_PATHWAY 4.837055 10
## 38 PANCREAS_BETA_CELLS NA 59
## 39 PEROXISOME 7.015352 3
## 40 PI3K_AKT_MTOR_SIGNALING 5.878181 17
## 41 PROTEIN_SECRETION 4.837055 9
## 42 REACTIVE_OXYGEN_SPECIES_PATHWAY NA 27
## 43 SPERMATOGENESIS 8.580771 37
## 44 TGF_BETA_SIGNALING 4.837055 7
## 45 TNFA_SIGNALING_VIA_NFKB 4.586848 9
## 46 UNFOLDED_PROTEIN_RESPONSE 6.087177 20
## 47 UV_RESPONSE_DN NA 5
## 48 UV_RESPONSE_UP 6.181422 9
## 49 WNT_BETA_CATENIN_SIGNALING 4.837055 12
## 50 XENOBIOTIC_METABOLISM NA 22
Most sustained cell-cell interaction signature
gsva.es_temp_new <- gsvaPar_cell_markers.es
# gsva.es_temp_new[, 1:50] <- scale(gsva.es_temp_new[, 1:50])
# List of all cell types (excluding Cytotoxic T cells)
cell_types <- colnames(gsvaPar_cell_markers.es)[1:57] # Add your full list here
cell_type_1 <- "Cytotoxic_T_cell" # "Cytotoxic_T_cell" # Cytotoxic T cells are the fixed cell type
# Initialize a data frame to store the results for each cell type
results <- data.frame(Cell_Type = character(), Peak_Pseudotime = numeric(), Peak_Broadness = numeric())
# Loop through each cell type and perform the convolution analysis
for (cell_type_2 in cell_types) {
# Sort data by pseudotime (log_pt)
gsva.es_temp_new <- gsva.es_temp_new[order(gsva.es_temp_new$log_pt), ]
# Subset the data for non-tumor and tumor groups
non_tumor <- subset(gsva.es_temp_new, Group == "non-tumor")
# non_tumor[, 1:50] <- scale(non_tumor[, 1:50])
non_tumor <- non_tumor[order(non_tumor$log_pt), ]
tumor <- subset(gsva.es_temp_new, Group == "glioblastoma, grade 4")
# tumor[, 1:50] <- scale(tumor[, 1:50])
tumor <- tumor[order(tumor$log_pt), ]
# Perform convolution for tumor and non-tumor data with the selected cell types
conv_tumor <- convolve(tumor[[cell_type_1]], rev(tumor[[cell_type_2]]), type = "open")
conv_non_tumor <- convolve(non_tumor[[cell_type_1]], rev(non_tumor[[cell_type_2]]), type = "open")
# Find peaks for tumor and non-tumor data
if (!all(is.na(conv_tumor)) && length(unique(conv_tumor)) > 1) {
peak_tumor <- which.max(conv_tumor)
tumor_peak_pseudotime <- tumor$log_pt[peak_tumor]
# Calculate peak broadness for tumor
peak_value <- conv_tumor[peak_tumor]
half_max <- peak_value / 2 # Half maximum
# Find where convolution value is closest to half_max on both sides of the peak
left_index <- max(which(conv_tumor[1:peak_tumor] <= half_max), na.rm = TRUE)
right_index <- min(which(conv_tumor[peak_tumor:length(conv_tumor)] <= half_max), na.rm = TRUE) + peak_tumor - 1
tumor_peak_broadness <- right_index - left_index
} else {
tumor_peak_pseudotime <- NA
tumor_peak_broadness <- NA
}
if (!all(is.na(conv_non_tumor)) && length(unique(conv_non_tumor)) > 1) {
peak_non_tumor <- which.max(conv_non_tumor)
non_tumor_peak_pseudotime <- non_tumor$log_pt[peak_non_tumor]
} else {
non_tumor_peak_pseudotime <- NA
}
# Append the results to the data frame
results <- rbind(results, data.frame(
Cell_Type = cell_type_2,
Peak_Pseudotime = tumor_peak_pseudotime,
Peak_Broadness = tumor_peak_broadness
))
# Pad conv_non_tumor with zeros to match the length of conv_tumor
conv_non_tumor_padded <- c(conv_non_tumor, rep(0, length(conv_tumor) - length(conv_non_tumor)))
# Combine results into a data frame for plotting
comparison_data <- data.frame(
Index = 1:length(conv_tumor),
tumor = conv_tumor,
non_tumor = conv_non_tumor_padded
)
# Create the base plot
p <- ggplot(comparison_data, aes(x = Index)) +
geom_line(aes(y = tumor, color = "tumor"), size = 1.2) +
geom_line(aes(y = non_tumor, color = "non_tumor"), size = 1.2, linetype = "dashed") +
labs(
title = paste("Convolution Results:", cell_type_1, "vs", cell_type_2, "\nin Tumor vs Non-tumor"),
subtitle = paste(cell_type_1, "and", cell_type_2),
y = paste("Convolution Output (Interaction between", cell_type_1, "\nand", cell_type_2, ")"),
x = "Index"
) +
scale_color_manual(values = c("tumor" = "red", "non_tumor" = "blue")) +
theme_minimal()
print(p)
# Conditionally add text annotation for the tumor peak (if valid)
if (!is.na(tumor_peak_pseudotime)) {
p <- p + annotate("text", x = peak_tumor, y = conv_tumor[peak_tumor],
label = paste0("Peak at Pseudotime: ", round(tumor_peak_pseudotime, 2)),
color = "red", vjust = -1.5, size = 4)
}
# Conditionally add text annotation for the non-tumor peak (if valid)
if (!is.na(non_tumor_peak_pseudotime)) {
p <- p + annotate("text", x = peak_non_tumor, y = conv_non_tumor_padded[peak_non_tumor],
label = paste0("Peak at Pseudotime: ", round(non_tumor_peak_pseudotime, 2)),
color = "blue", vjust = -1.5, size = 4)
}
# Print and save the plot for each cell type
print(p)
ggsave(filename = paste0("Convolution_", cell_type_1, "_vs_", cell_type_2, ".png"), plot = p)
}## Cell_Type Peak_Pseudotime Peak_Broadness
## 1 Microglial_cell NA 0
## 2 Stem_cell 2.608630 0
## 3 Fibroblast 2.608630 Inf
## 4 Sensory_neuron NA 75
## 5 Neuron NA 0
## 6 Neural_stem_cell 2.608630 0
## 7 Glial_cell NA 0
## 8 Neural_progenitor_cell 2.608630 0
## 9 Pericyte 2.608630 0
## 10 Oligodendrocyte NA 0
## 11 Endothelial_cell 2.608630 0
## 12 Macrophage 2.608630 0
## 13 Oligodendrocyte_progenitor_cell 2.608630 0
## 14 Astrocyte NA 0
## 15 Myeloid_cell 2.608630 0
## 16 Thalamic_cell NA 0
## 17 Progenitor_cell NA 0
## 18 Radial_glial_cell 2.608630 0
## 19 Excitatory_neuron NA 0
## 20 Inhibitory_neuron NA 46
## 21 T_cell NA 0
## 22 Oligodendrocyte_precursor_cell 2.608630 0
## 23 Mesenchymal_cell 2.608630 0
## 24 Cancer_cell NA 0
## 25 Interneuron NA 51
## 26 Cancer_stem_cell 2.608630 0
## 27 von_Economo_neuron 10.507548 78
## 28 CD4_T_cell 2.608630 0
## 29 Regulatory_T_cell NA 50
## 30 B_cell 10.507548 78
## 31 Tissueresident_macrophage 2.608630 0
## 32 Dendritic_cell NA 79
## 33 Infiltrating_macrophage 2.608630 Inf
## 34 Natural_killer_cell NA 77
## 35 Intermediate_progenitor_cell 10.507548 75
## 36 Monocyte 4.868329 4
## 37 CD8_T_cell 10.507548 76
## 38 Red_blood_cell_ NA 66
## 39 Lymphocyte NA 74
## 40 Mural_cell 2.608630 0
## 41 Effector_memory_CD4_T_cell NA 0
## 42 Naive_CD8_T_cell 10.507548 78
## 43 Memory_T_cell NA 0
## 44 Cytotoxic_T_cell NA 76
## 45 Natural_killer_T_cell 10.507548 76
## 46 Panneuronal_cell NA 0
## 47 Predendritic_cell NA 0
## 48 Conventional_dendritic_cell_2a 10.507548 72
## 49 Conventional_dendritic_cell_2b 2.608630 Inf
## 50 Lactotroph_cell 10.507548 78
## 51 Gonadotroph_cell 10.507548 78
## 52 Truncated_radial_glial_cell NA 0
## 53 Deep_layer_neuron 10.507548 76
## 54 Nonneuron 2.608630 0
## 55 Motor_neuron NA 76
## 56 Young_neuron NA 25
## 57 Effector_memory_T_cell NA 75
Pathway interactions
gsva.es_temp_new <- gsva.es_temp
gsva.es_temp_new$log_pt <- log(gsva.es_temp_new$pt)
gsva.es_temp_new[, 1:50] <- scale(gsva.es_temp_new[, 1:50])
colnames(gsva.es_temp_new) <- gsub("HALLMARK_",
"", colnames(gsva.es_temp_new))
# List of all cell types (excluding Cytotoxic T cells)
# Add your full list here)
cell_types <- gsub("HALLMARK_",
"", rownames(top_table_up_sig))
cell_type_1 <- "KRAS_SIGNALING_DN" # "Cytotoxic_T_cell" # Cytotoxic T cells are the fixed cell type
# Initialize a data frame to store the results for each cell type
results <- data.frame(Cell_Type = character(), Peak_Pseudotime = numeric(), Peak_Broadness = numeric())
# Loop through each cell type and perform the convolution analysis
for (cell_type_2 in cell_types) {
# Sort data by pseudotime (log_pt)
gsva.es_temp_new <- gsva.es_temp_new[order(gsva.es_temp_new$log_pt), ]
# Subset the data for non-tumor and tumor groups
non_tumor <- subset(gsva.es_temp_new, Group == "non-tumor")
# non_tumor[, 1:50] <- scale(non_tumor[, 1:50])
non_tumor <- non_tumor[order(non_tumor$log_pt), ]
tumor <- subset(gsva.es_temp_new, Group == "glioblastoma, grade 4")
# tumor[, 1:50] <- scale(tumor[, 1:50])
tumor <- tumor[order(tumor$log_pt), ]
# Perform convolution for tumor and non-tumor data with the selected cell types
conv_tumor <- convolve(tumor[[cell_type_1]],
rev(tumor[[cell_type_2]]), type = "open")
conv_non_tumor <- convolve(non_tumor[[cell_type_1]],
rev(non_tumor[[cell_type_2]]), type = "open")
# Find peaks for tumor and non-tumor data
if (!all(is.na(conv_tumor)) && length(unique(conv_tumor)) > 1) {
peak_tumor <- which.max(conv_tumor)
tumor_peak_pseudotime <- tumor$log_pt[peak_tumor]
# Calculate peak broadness for tumor
peak_value <- conv_tumor[peak_tumor]
half_max <- peak_value / 2 # Half maximum
# Find where convolution value is closest to half_max on both sides of the peak
left_index <- max(which(conv_tumor[1:peak_tumor] <= half_max), na.rm = TRUE)
right_index <- min(which(conv_tumor[peak_tumor:length(conv_tumor)] <= half_max), na.rm = TRUE) + peak_tumor - 1
tumor_peak_broadness <- right_index - left_index
} else {
tumor_peak_pseudotime <- NA
tumor_peak_broadness <- NA
}
if (!all(is.na(conv_non_tumor)) && length(unique(conv_non_tumor)) > 1) {
peak_non_tumor <- which.max(conv_non_tumor)
non_tumor_peak_pseudotime <- non_tumor$log_pt[peak_non_tumor]
} else {
non_tumor_peak_pseudotime <- NA
}
# Append the results to the data frame
results <- rbind(results, data.frame(
Cell_Type = cell_type_2,
Peak_Pseudotime = tumor_peak_pseudotime,
Peak_Broadness = tumor_peak_broadness
))
# Pad conv_non_tumor with zeros to match the length of conv_tumor
conv_non_tumor_padded <- c(conv_non_tumor, rep(0, length(conv_tumor) - length(conv_non_tumor)))
# Combine results into a data frame for plotting
comparison_data <- data.frame(
Index = 1:length(conv_tumor),
tumor = conv_tumor,
non_tumor = conv_non_tumor_padded
)
# Create the base plot
p <- ggplot(comparison_data, aes(x = Index)) +
geom_line(aes(y = tumor, color = "tumor"), size = 1.2) +
geom_line(aes(y = non_tumor, color = "non_tumor"), size = 1.2, linetype = "dashed") +
labs(
title = paste("Convolution Results:", cell_type_1, "vs", cell_type_2, "\nin Tumor vs Non-tumor"),
subtitle = paste(cell_type_1, "and", cell_type_2),
y = paste("Convolution Output (Interaction between", cell_type_1, "\nand", cell_type_2, ")"),
x = "Index"
) +
scale_color_manual(values = c("tumor" = "red", "non_tumor" = "blue")) +
theme_minimal()
print(p)
# Conditionally add text annotation for the tumor peak (if valid)
if (!is.na(tumor_peak_pseudotime)) {
p <- p + annotate("text", x = peak_tumor, y = conv_tumor[peak_tumor],
label = paste0("Peak at Pseudotime: ", round(tumor_peak_pseudotime, 2)),
color = "red", vjust = -1.5, size = 4)
}
# Conditionally add text annotation for the non-tumor peak (if valid)
if (!is.na(non_tumor_peak_pseudotime)) {
p <- p + annotate("text", x = peak_non_tumor, y = conv_non_tumor_padded[peak_non_tumor],
label = paste0("Peak at Pseudotime: ", round(non_tumor_peak_pseudotime, 2)),
color = "blue", vjust = -1.5, size = 4)
}
# Print and save the plot for each cell type
print(p)
ggsave(filename = paste0("Convolution_", cell_type_1, "_vs_", cell_type_2, ".png"), plot = p)
}## Cell_Type Peak_Pseudotime Peak_Broadness
## 1 NOTCH_SIGNALING 8.407143 12
## 2 APOPTOSIS 8.467514 7
## 3 ANGIOGENESIS 8.327232 17
## 4 P53_PATHWAY 8.327232 5
## 5 E2F_TARGETS 8.580771 32
## 6 DNA_REPAIR 8.580771 17
## 7 G2M_CHECKPOINT 8.580771 48
## 8 INTERFERON_ALPHA_RESPONSE 7.316267 2
## 9 INTERFERON_GAMMA_RESPONSE 7.316267 2
## 10 EPITHELIAL_MESENCHYMAL_TRANSITION 8.327232 18
## 11 KRAS_SIGNALING_DN NA 24
## 12 PI3K_AKT_MTOR_SIGNALING 8.572759 19
## 13 MYC_TARGETS_V1 8.580771 29
## 14 UNFOLDED_PROTEIN_RESPONSE 8.467514 16
## 15 IL6_JAK_STAT3_SIGNALING 7.316267 7
## 16 TGF_BETA_SIGNALING 8.404102 7
## 17 MITOTIC_SPINDLE 9.773672 11
## 18 MYC_TARGETS_V2 8.580771 20
## 19 MTORC1_SIGNALING 8.467514 17
## 20 COAGULATION 7.316267 5
## 21 GLYCOLYSIS 8.633564 10
## 22 ALLOGRAFT_REJECTION 7.316267 9
## 23 IL2_STAT5_SIGNALING 8.467514 14
## 24 WNT_BETA_CATENIN_SIGNALING 8.343410 9
## 25 ADIPOGENESIS 7.316267 4
## 26 HYPOXIA 8.467514 17
## 27 PANCREAS_BETA_CELLS NA 12
## 28 TNFA_SIGNALING_VIA_NFKB 8.467514 17
## 29 COMPLEMENT 7.316267 7
## 30 ANDROGEN_RESPONSE 8.467514 2
## 31 INFLAMMATORY_RESPONSE 7.316267 5
## 32 XENOBIOTIC_METABOLISM 7.316267 5
## 33 HEDGEHOG_SIGNALING 9.168972 6
## 34 ESTROGEN_RESPONSE_LATE 7.316267 5
## 35 KRAS_SIGNALING_UP 8.413909 7
## 36 CHOLESTEROL_HOMEOSTASIS 8.633564 8
## R version 4.3.3 (2024-02-29 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 11 x64 (build 22631)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=German_Germany.utf8 LC_CTYPE=German_Germany.utf8
## [3] LC_MONETARY=German_Germany.utf8 LC_NUMERIC=C
## [5] LC_TIME=German_Germany.utf8
##
## time zone: Europe/Berlin
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] readxl_1.4.3 viridis_0.6.5 viridisLite_0.4.2
## [4] corrr_0.4.4 robustbase_0.99-2 pheatmap_1.0.12
## [7] GSVA_1.50.5 escape_1.12.0 lubridate_1.9.3
## [10] forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
## [13] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1
## [16] tibble_3.2.1 tidyverse_2.0.0 ggplot2_3.5.0
## [19] openxlsx_4.2.5.2 hgu133plus2.db_3.13.0 org.Hs.eg.db_3.18.0
## [22] annotate_1.80.0 XML_3.99-0.16.1 AnnotationDbi_1.64.1
## [25] IRanges_2.36.0 S4Vectors_0.40.2 limma_3.58.1
## [28] affy_1.80.0 Biobase_2.62.0 BiocGenerics_0.48.1
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 UCell_2.6.2
## [3] rstudioapi_0.16.0 jsonlite_1.8.8
## [5] magrittr_2.0.3 farver_2.1.1
## [7] rmarkdown_2.26 ragg_1.3.2
## [9] zlibbioc_1.48.2 vctrs_0.6.5
## [11] memoise_2.0.1 DelayedMatrixStats_1.24.0
## [13] RCurl_1.98-1.14 htmltools_0.5.8.1
## [15] S4Arrays_1.2.1 BiocNeighbors_1.20.2
## [17] broom_1.0.5 cellranger_1.1.0
## [19] Rhdf5lib_1.24.2 SparseArray_1.2.4
## [21] rhdf5_2.46.1 sass_0.4.9
## [23] bslib_0.7.0 plyr_1.8.9
## [25] cachem_1.0.8 lifecycle_1.0.4
## [27] pkgconfig_2.0.3 rsvd_1.0.5
## [29] Matrix_1.6-5 R6_2.5.1
## [31] fastmap_1.1.1 GenomeInfoDbData_1.2.11
## [33] MatrixGenerics_1.14.0 digest_0.6.35
## [35] colorspace_2.1-0 patchwork_1.2.0
## [37] irlba_2.3.5.1 textshaping_0.3.7
## [39] GenomicRanges_1.54.1 RSQLite_2.3.7
## [41] beachmat_2.18.1 labeling_0.4.3
## [43] fansi_1.0.6 timechange_0.3.0
## [45] mgcv_1.9-1 httr_1.4.7
## [47] abind_1.4-5 compiler_4.3.3
## [49] bit64_4.0.5 withr_3.0.0
## [51] backports_1.4.1 BiocParallel_1.36.0
## [53] DBI_1.2.2 highr_0.10
## [55] HDF5Array_1.30.1 DelayedArray_0.28.0
## [57] tools_4.3.3 zip_2.3.1
## [59] msigdbr_7.5.1 glue_1.8.0
## [61] nlme_3.1-164 rhdf5filters_1.14.1
## [63] grid_4.3.3 reshape2_1.4.4
## [65] generics_0.1.3 gtable_0.3.4
## [67] tzdb_0.4.0 preprocessCore_1.64.0
## [69] data.table_1.15.4 hms_1.1.3
## [71] BiocSingular_1.18.0 ScaledMatrix_1.10.0
## [73] utf8_1.2.4 XVector_0.42.0
## [75] pillar_1.9.0 babelgene_22.9
## [77] splines_4.3.3 lattice_0.22-5
## [79] bit_4.0.5 tidyselect_1.2.1
## [81] SingleCellExperiment_1.24.0 Biostrings_2.70.3
## [83] knitr_1.46 gridExtra_2.3
## [85] SummarizedExperiment_1.32.0 xfun_0.43
## [87] statmod_1.5.0 matrixStats_1.3.0
## [89] DEoptimR_1.1-3 stringi_1.8.3
## [91] yaml_2.3.8 evaluate_0.23
## [93] codetools_0.2-19 BiocManager_1.30.23
## [95] graph_1.80.0 cli_3.6.3
## [97] affyio_1.72.0 systemfonts_1.1.0
## [99] xtable_1.8-4 munsell_0.5.1
## [101] jquerylib_0.1.4 Rcpp_1.0.12
## [103] GenomeInfoDb_1.38.8 png_0.1-8
## [105] parallel_4.3.3 blob_1.2.4
## [107] sparseMatrixStats_1.14.0 bitops_1.0-7
## [109] GSEABase_1.64.0 scales_1.3.0
## [111] ggridges_0.5.6 crayon_1.5.2
## [113] rlang_1.1.4 KEGGREST_1.42.0